#!/usr/bin/env python3

import argparse
import csv
import sys

def parse_table(rows, table_title):
    """
    Parse a CSV table out of an iterator over rows.

    Return a tuple containing (extracted headers, extracted rows).
    """
    in_table = False
    rows_iter = iter(rows)
    extracted = []
    headers = None
    while True:
        try:
            row = next(rows_iter)
        except StopIteration:
            break

        if not in_table and row == [table_title]:
            in_table = True
            next_row = next(rows_iter)
            assert next_row == [], f'There should be an empty row after the title of the table, found {next_row}'
            headers = next(rows_iter) # Extract the headers
            continue

        elif in_table and row == []: # An empty row marks the end of the table
            in_table = False
            break

        elif in_table:
            extracted.append(row)

    assert len(extracted) != 0, f'Could not extract rows from the table, this is suspicious. Table title was {table_title}'
    assert headers is not None, f'Could not extract headers from the table, this is suspicious. Table title was {table_title}'

    return (headers, extracted)

def main(argv):
    parser = argparse.ArgumentParser(
        prog='parse-spec-results',
        description='Parse SPEC result files (in CSV format) and extract the selected result table, in the selected format.')
    parser.add_argument('filename', type=argparse.FileType('r'), nargs='+',
        help='One of more CSV files to extract the results from. The results parsed from each file are concatenated '
             'together.')
    parser.add_argument('--table', type=str, choices=['full', 'selected'], default='full',
        help='The name of the table to extract from SPEC results. `full` means extracting the Full Results Table '
             'and `selected` means extracting the Selected Results Table. Default is `full`.')
    parser.add_argument('--output-format', type=str, choices=['csv', 'lnt'], default='csv',
        help='The desired output format for the data. `csv` is CSV format and `lnt` is a format compatible with '
             '`lnt importreport` (see https://llvm.org/docs/lnt/importing_data.html#importing-data-in-a-text-file).')
    parser.add_argument('--extract', type=str,
        help='A comma-separated list of headers to extract from the table. If provided, only the data associated to '
             'those headers will be present in the resulting data. Invalid header names are diagnosed. Please make '
             'sure to use appropriate quoting for header names that contain spaces. This option only makes sense '
             'when the output format is CSV.')
    parser.add_argument('--keep-not-run', action='store_true',
        help='Keep entries whose "Base Status" is marked as "NR" (aka "Not Run"). By default, such entries are discarded.')
    parser.add_argument('--keep-failed', action='store_true',
        help='Keep entries whose "Base Status" is marked as "CE" (aka "Compilation Error") or "RE" (aka "Runtime Error"). '
             'By default, such entries are discarded.')
    args = parser.parse_args(argv)

    if args.table == 'full':
        table_title = 'Full Results Table'
    elif args.table == 'selected':
        table_title = 'Selected Results Table'

    # Parse the headers and the rows in each file, aggregating all the results
    headers = None
    rows = []
    for file in args.filename:
        reader = csv.reader(file)
        (parsed_headers, parsed_rows) = parse_table(reader, table_title)
        assert headers is None or headers == parsed_headers, f'Found files with different headers: {headers} and {parsed_headers}'
        headers = parsed_headers
        rows.extend(parsed_rows)

    # Remove rows that were not run (or failed) unless we were asked to keep them
    status = headers.index('Base Status')
    if not args.keep_not_run:
        rows = [row for row in rows if row[status] != 'NR']
    if not args.keep_failed:
        rows = [row for row in rows if row[status] not in ('CE', 'RE')]

    if args.extract is not None:
        if args.output_format != 'csv':
            raise RuntimeError('Passing --extract requires the output format to be csv')
        for h in args.extract.split(','):
            if h not in headers:
                raise RuntimeError(f'Header name {h} was not present in the parsed headers {headers}')

        extracted_fields = [headers.index(h) for h in args.extract.split(',')]
        headers = [headers[i] for i in extracted_fields]
        rows = [[row[i] for i in extracted_fields] for row in rows]

    # Print the results in the right format
    if args.output_format == 'csv':
        writer = csv.writer(sys.stdout)
        writer.writerow(headers)
        for row in rows:
            writer.writerow(row)
    elif args.output_format == 'lnt':
        benchmark = headers.index('Benchmark')
        time = headers.index('Est. Base Run Time')
        for row in rows:
            print(f'{row[benchmark].replace(".", "_")}.execution_time {row[time]}')

if __name__ == '__main__':
    main(sys.argv[1:])
